import requests
import time
import re
from house_data.configs import config
import data_connect


def get_data(url, headers):
    response = requests.get(url, headers=headers, verify=False)
    if response.status_code == 200:
        return response.text
    else:
        print(response.status_code)


def parse_data_url(html, patten):
    book_info = re.compile(patten, re.S)
    items = re.findall(book_info, html)
    return items


def main():
    for star in range(1):
        time.sleep(5)
        print("正在抓取")
        book_url = config.URL()
        headers = config.header
        html = get_data(book_url, headers)
        # print(html)
        # 获取书名
        info_patten = '<span class="s-tit" data-v-bc532ab8 data-v-7af1491d>.*?</a></span>'
        book_name = parse_data_url(html, info_patten)
        name_list = []
        for name_info in book_name:
            name_patten = '[\u4e00-\u9fa5]+.*?[\u4e00-\u9fa5]+.*?[\u4e00-\u9fa5]+'
            name = parse_data_url(name_info, name_patten)
            name_list.append(name[0])
        # print(name_list)
        # 获取热度
        hot_patten = '<em data-v-bc532ab8 data-v-7af1491d>.*?</em>'
        hot_info = parse_data_url(html, hot_patten)
        hot_list = []
        num_list =[]
        for hot_name in hot_info:
            hot_data = '>.*?<'
            hot_num = parse_data_url(hot_name,hot_data)
            hot_list.append(hot_num[0])
        for num in hot_list:
            num_patten = '[0-9]+.*?[0-9]+'
            num_info = parse_data_url(num, num_patten)
            num_list.append(num_info[0])
        # print(num_list)
        # print(html)
        # 获取作者
        writer_patten = '<span class="s-des" data-v-bc532ab8 data-v-7af1491d>.*?</a></span>'
        writer_info = parse_data_url(html, writer_patten)
        writer_list = []
        for writer_name in writer_info:
            writer_data = '[\u4e00-\u9fa5]+'
            writer = parse_data_url(writer_name, writer_data)
            writer_list.append(writer[0])
        # print(writer_list)

        # 小说分类
        # 女频热门
        girl_hot_name = name_list[0:10]
        girl_hot_num = num_list[0:10]
        girl_hot_writer = writer_list[0:10]
        print(girl_hot_name)
        print(girl_hot_num)
        print(girl_hot_writer)
        for i in range(10):
            gh = f'insert into girl_hot_range(book_name,book_writer,book_hot) values (%s,%s,%s);'
            ghv = (str(girl_hot_name[i]), str(girl_hot_writer[i]), float(girl_hot_num[i]))
            data_connect.insert_data(gh, ghv)

        # 男频热门
        boy_hot_name = name_list[10:20]
        boy_hot_num = num_list[10:20]
        boy_hot_writer = writer_list[10:20]
        print(boy_hot_name)
        print(boy_hot_num)
        print(boy_hot_writer)
        for i in range(10):
            bh = f'insert into boy_hot_range(book_name,book_writer,book_hot) values (%s,%s,%s);'
            bhv = (str(boy_hot_name[i]), str(boy_hot_writer[i]),float(boy_hot_num[i]))
            data_connect.insert_data(bh, bhv)
        # 女频新书
        girl_new_name = name_list[20:30]
        girl_new_num = num_list[20:30]
        girl_new_writer = writer_list[20:30]
        print(girl_new_name)
        print(girl_new_num)
        print(girl_new_writer)
        for i in range(10):
            gn = f'insert into girl_new_range(book_name,book_writer,book_hot) values (%s,%s,%s);'
            gnv = (str(girl_new_name[i]),str(girl_new_writer[i]),float(girl_new_num[i]))
            data_connect.insert_data(gn,gnv)
        # 男频新书
        boy_new_name = name_list[30:40]
        boy_new_num = num_list[30:40]
        boy_new_writer = writer_list[30:40]
        print(boy_new_name)
        print(boy_new_num)
        print(boy_new_writer)
        for i in range(10):
            bn = f'insert into boy_new_range(book_name,book_writer,book_hot) values (%s,%s,%s);'
            bnv = (str(boy_new_name[i]),str(boy_new_writer[i]),float(boy_new_num[i]))
            data_connect.insert_data(bn, bnv)


if __name__ == '__main__':
    main()
